In this notebook we aim to answer the question:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.io as pio
import seaborn as sns
df = pd.read_csv("dataset_2019_2022_new.csv")
df.transaction_date = pd.to_datetime(df.transaction_date)
df['year'] = df.transaction_date.dt.year
df2 = df.groupby(['department']) \
.agg(
engagement=('commodity', 'count'),
revenue=('price',sum)
).reset_index()
df2
| department | engagement | revenue | |
|---|---|---|---|
| 0 | Cosmetics | 455 | 2528.65 |
| 1 | Deli | 3635 | 15085.84 |
| 2 | Floral | 345 | 2588.94 |
| 3 | Grocery | 25515 | 63534.81 |
| 4 | Meat | 10855 | 52392.34 |
| 5 | Nutrition | 2471 | 8809.72 |
| 6 | Pastry | 4636 | 13725.72 |
| 7 | Pharmaceutical | 11638 | 44603.66 |
| 8 | Produce | 16163 | 37207.43 |
| 9 | Salad Bar | 484 | 1580.44 |
| 10 | Seafood | 1494 | 8922.92 |
fig = go.Figure()
fig.add_trace(go.Scatter(x=df2['engagement'], y=df2['revenue'], mode='markers',
marker=dict(color=df2['revenue']), text=df2['department'])) # hover text goes here
highest = df2.loc[df2['revenue'].idxmax(), 'department']
lowest = df2.loc[df2['revenue'].idxmin(), 'department']
fig.add_annotation(x=df2.loc[df2['engagement'].idxmax(), 'engagement'],
y=df2.loc[df2['revenue'].idxmax(), 'revenue'],
text=highest, showarrow=True, arrowhead=1)
fig.add_annotation(x=df2.loc[df2['engagement'].idxmin(), 'engagement'],
y=df2.loc[df2['revenue'].idxmin(), 'revenue'],
text=lowest, showarrow=True, arrowhead=1)
fig.update_layout(
title='Department - Engagement vs Revenue',
yaxis=dict(title='Revenue'),
xaxis=dict(title='Engagement')
)
fig.show()
This scatter plot illustrates the relationship between engagement and revenue for different departments. Grocery stands out with the highest engagement and highest revenue. Other stands out are Meat, Pharmaceutical, and Produce.
Based on the insight provided, we can allocate marketing efforts and resources to categories with high engagement and revenue to further boost sales and customer satisfaction.
#top 5 commodities by department
df3 = df.groupby(['department', 'commodity']) \
.agg(
engagement=('commodity', 'count'),
revenue=('price',sum)
).reset_index()
top5_df = pd.concat(
[df3[df3.department == d] \
.sort_values('revenue', ascending=False) \
.head(5) for d in df3.department.unique()]).reset_index(drop=True)
top5_df
| department | commodity | engagement | revenue | |
|---|---|---|---|---|
| 0 | Cosmetics | Makeup and treatment | 367 | 1360.05 |
| 1 | Cosmetics | Coupon/misc items | 20 | 799.80 |
| 2 | Cosmetics | Cosmetic accessories | 12 | 155.88 |
| 3 | Cosmetics | Bath | 48 | 152.96 |
| 4 | Cosmetics | Fragrances | 8 | 59.96 |
| 5 | Deli | Deli meats | 1530 | 5912.61 |
| 6 | Deli | Cheese | 795 | 3312.77 |
| 7 | Deli | Chicken/poultry | 388 | 1883.68 |
| 8 | Deli | Prepared food | 260 | 1279.49 |
| 9 | Deli | Salad | 336 | 1161.36 |
| 10 | Floral | Floral plants | 288 | 2236.51 |
| 11 | Floral | Roses | 33 | 310.67 |
| 12 | Floral | Floral accessories | 24 | 41.76 |
| 13 | Grocery | Soft drinks | 1724 | 4194.49 |
| 14 | Grocery | Fluid milk products | 1378 | 3347.96 |
| 15 | Grocery | Cheese | 1267 | 3056.22 |
| 16 | Grocery | Frozen meat | 1028 | 2879.74 |
| 17 | Grocery | Baked bread/buns/rolls | 1259 | 2370.49 |
| 18 | Meat | Beef | 2940 | 17297.40 |
| 19 | Meat | Lunch meat | 2075 | 5572.08 |
| 20 | Meat | Pork | 684 | 5027.56 |
| 21 | Meat | Chicken | 796 | 4283.37 |
| 22 | Meat | Meat - Other | 536 | 3426.20 |
| 23 | Nutrition | Refrigerated | 804 | 3152.84 |
| 24 | Nutrition | Fitness/diet | 236 | 977.56 |
| 25 | Nutrition | Frozen | 220 | 901.04 |
| 26 | Nutrition | Cereal/breakfast | 136 | 636.00 |
| 27 | Nutrition | Water | 275 | 473.36 |
| 28 | Pastry | Cakes | 626 | 3461.57 |
| 29 | Pastry | Bread | 1240 | 2810.56 |
| 30 | Pastry | Breakfast sweets | 1317 | 2732.00 |
| 31 | Pastry | Cookies | 408 | 1702.64 |
| 32 | Pastry | Pies | 328 | 1435.84 |
| 33 | Pharmaceutical | Cigarettes | 569 | 4826.62 |
| 34 | Pharmaceutical | Candy | 2862 | 4603.99 |
| 35 | Pharmaceutical | Hair care products | 606 | 2104.35 |
| 36 | Pharmaceutical | Oral hygiene products | 576 | 1944.93 |
| 37 | Pharmaceutical | Diapers | 177 | 1853.37 |
| 38 | Produce | Salad | 1125 | 2999.69 |
| 39 | Produce | Potatoes | 947 | 2875.05 |
| 40 | Produce | Apples | 942 | 2753.67 |
| 41 | Produce | Berries | 734 | 2675.50 |
| 42 | Produce | Tropical fruit | 2145 | 2570.62 |
| 43 | Salad Bar | Salad | 480 | 1549.04 |
| 44 | Salad Bar | Produce supplies | 4 | 31.40 |
| 45 | Seafood | Seafood-frozen | 872 | 5762.80 |
| 46 | Seafood | Seafood-fresh | 432 | 2621.68 |
| 47 | Seafood | Seafood-misc | 150 | 314.68 |
| 48 | Seafood | Frozen boxed/grocery | 40 | 223.76 |
top5_df.groupby('department').agg(
total_rev=('revenue',sum),
total_eng=('engagement',sum)
).reset_index()
| department | total_rev | total_eng | |
|---|---|---|---|
| 0 | Cosmetics | 2528.65 | 455 |
| 1 | Deli | 13549.91 | 3309 |
| 2 | Floral | 2588.94 | 345 |
| 3 | Grocery | 15848.90 | 6656 |
| 4 | Meat | 35606.61 | 7031 |
| 5 | Nutrition | 6140.80 | 1671 |
| 6 | Pastry | 12142.61 | 3919 |
| 7 | Pharmaceutical | 15333.26 | 4790 |
| 8 | Produce | 13874.53 | 5893 |
| 9 | Salad Bar | 1580.44 | 484 |
| 10 | Seafood | 8922.92 | 1494 |
tmp = top5_df.groupby('department').agg(
total_rev=('revenue',sum),
total_eng=('engagement',sum)
).reset_index()
plt.figure(figsize=(10, 5))
sns.set(style='whitegrid')
ax = sns.barplot(x='department', y='total_rev', data=tmp, palette='Set3')
plt.xticks(rotation=45)
plt.show()
tmp = top5_df.groupby('department').agg(
total_rev=('revenue',sum),
total_eng=('engagement',sum)
).reset_index()
plt.figure(figsize=(10, 5))
sns.set(style='whitegrid')
ax = sns.barplot(x='department', y='total_eng', data=tmp, palette='Set3')
plt.xticks(rotation=45)
plt.show()
#revenue bar chart
fig = go.Figure()
for d in top5_df.commodity.unique():
tmp = top5_df[top5_df.commodity==d].copy()
fig.add_trace(go.Bar(name=d,x=tmp['department'],y=tmp['revenue'],
hovertext=tmp['revenue'].round(2)))
fig.update_layout(barmode='stack')
fig.update_layout(title ='Top 5 Commodities on each Department - Revenue',
yaxis=dict(
title='Revenue'
))
fig.show()
#engagement bar chart
fig = go.Figure()
for d in top5_df.commodity.unique():
tmp = top5_df[top5_df.commodity==d].copy()
fig.add_trace(go.Bar(name=d,x=tmp['department'],y=tmp['engagement'],
hovertext=tmp['engagement'].round(2)))
fig.update_layout(barmode='stack')
fig.update_layout(title ='Top 5 Commodities on each Department - Engagement',
yaxis=dict(
title='Engagement'
))
fig.show()
The analysis of the top 5 commodities on each department based on both revenue and engagement reveals valuable insights into the departmental performance, emphasizing strategic opportunities between revenue generation and customer engagement.
# Average engagement frequency from baby items per household
baby_df = df[df.commodity.str.lower().str.contains('baby|diapers')] \
.groupby(['year','household_type','commodity']) \
.agg(engagement=('commodity','count')).reset_index()
baby_df.groupby(['household_type','commodity']).agg(avg_engagement=('engagement','mean')).reset_index()
| household_type | commodity | avg_engagement | |
|---|---|---|---|
| 0 | 1 adult with kids | Baby foods | 18.500000 |
| 1 | 1 adult with kids | Baby hbc | 15.000000 |
| 2 | 1 adult with kids | Diapers | 10.500000 |
| 3 | 2 adults with kids | Baby foods | 27.250000 |
| 4 | 2 adults with kids | Baby hbc | 21.500000 |
| 5 | 2 adults with kids | Diapers | 13.500000 |
| 6 | 2 adults with no kids | Baby foods | 17.250000 |
| 7 | 2 adults with no kids | Baby hbc | 16.000000 |
| 8 | 2 adults with no kids | Diapers | 10.500000 |
| 9 | Single female | Baby foods | 11.666667 |
| 10 | Single female | Baby hbc | 7.000000 |
| 11 | Single female | Diapers | 2.500000 |
| 12 | Single male | Baby foods | 17.750000 |
| 13 | Single male | Baby hbc | 13.250000 |
| 14 | Single male | Diapers | 7.250000 |
data_baby = []
baby_color = {
'Baby foods': '#ff9999',
'Baby hbc': '#cc99ff',
'Diapers': '#99ffff'
}
for d in baby_df.commodity.unique():
tmp = baby_df[baby_df.commodity==d].groupby(['household_type','commodity']) \
.agg(avg_engagement=('engagement','mean')).reset_index()
tmp['avg_engagement'] = tmp['avg_engagement'].round(0)
data_baby.append(go.Bar(x=tmp.household_type,
y=tmp.avg_engagement,
name=d,
text=tmp.avg_engagement,
marker_color=baby_color[d]))
fig = go.Figure(
data=data_baby,
layout = go.Layout(
title ='Baby Items Engagement Frequency by Household Type',
yaxis=dict(
title='Average Engagement'
)
)
)
fig.show()
Household Type: The grouped bar chart reveals intriguing patterns in average engagement accross baby items per household.
Insights:
Among households with kids, both single and couples, Baby Foods exhibit relatively high average engagement. This suggests that families with children, regardless of household composition, show a strong interest in this commodity.
Similar to Baby Foods, Baby HBC products also demonstrate substantial engagement across different household types. This implies that products catering to the health and beauty needs of infants and children hold a consistent appeal.
Diapers, a fundamental necessity for families with young children, receive significant engagement from households with kids. Interestingly, households with two adults and kids show a higher engagement with this commodity compared to households with just one adult and kids.
Both Single Females and Single Males exhibit lower engagement across all commodities compared to households with kids. Baby Foods garner relatively higher engagement among Single Males, potentially indicating their role as caregivers. Diapers, understandably, have the least engagement across both Single Female and Single Male households.
Households without Kids: Households with two adults and no kids display moderate engagement with Baby Foods and Baby HBC, suggesting that these products might cater to specific needs even in households without children. Diapers exhibit lower engagement in these households, reflecting their lack of relevance.
# Average engagement frequency from baby items per age band
baby_df2 = df[df.commodity.str.lower().str.contains('baby|diapers')] \
.groupby(['year','age_band','commodity']) \
.agg(engagement=('commodity','count')).reset_index()
baby_df2.groupby(['age_band','commodity']).agg(avg_engagement=('engagement','mean')).reset_index()
| age_band | commodity | avg_engagement | |
|---|---|---|---|
| 0 | 19-24 | Baby foods | 45.75 |
| 1 | 19-24 | Baby hbc | 37.50 |
| 2 | 19-24 | Diapers | 21.50 |
| 3 | 25-34 | Baby foods | 15.00 |
| 4 | 25-34 | Baby hbc | 12.25 |
| 5 | 25-34 | Diapers | 8.50 |
| 6 | 35-44 | Baby foods | 4.75 |
| 7 | 35-44 | Baby hbc | 6.00 |
| 8 | 35-44 | Diapers | 5.50 |
| 9 | 45-54 | Baby foods | 24.00 |
| 10 | 45-54 | Baby hbc | 18.50 |
| 11 | 45-54 | Diapers | 11.50 |
data_baby = []
for d in baby_df2.commodity.unique():
tmp = baby_df2[baby_df2.commodity==d].groupby(['age_band','commodity']) \
.agg(avg_engagement=('engagement','mean')).reset_index()
tmp['avg_engagement'] = tmp['avg_engagement'].round(0)
data_baby.append(go.Bar(x=tmp.age_band,
y=tmp.avg_engagement,
name=d, text=tmp.avg_engagement,
marker_color=baby_color[d]))
fig = go.Figure(
data=data_baby,
layout = go.Layout(
title ='Baby Items Engagement Frequency by Age Band',
yaxis=dict(
title='Average Engagement'
)
)
)
fig.show()
Age Band: These insights underscore the need for SuperFoodsMax to tailor its marketing strategies and product offerings according to the distinct preferences and priorities of different age bands. For the 19-24 age group, emphasizing baby care commodities and providing resources for young parents could enhance engagement. In the 25-34 age band, SuperFoodsMax can explore strategies that cater to potential parenting considerations while still valuing personal well-being. For older age bands, focusing on broader wellness offerings while acknowledging reduced engagement with baby care products could be a strategic approach.
dairy_df=df[df.commodity.str.lower().str.contains('milk|cheese|butter|egg|yogurt|cream')] \
.groupby(['year','household_type','commodity','brand']) \
.agg(
engagement=('commodity','count'),
revenue=('price',sum)).reset_index()
dairy_df
| year | household_type | commodity | brand | engagement | revenue | |
|---|---|---|---|---|---|---|
| 0 | 2019 | 1 adult with kids | Butter | national | 2 | 10.64 |
| 1 | 2019 | 1 adult with kids | Butter | private | 4 | 9.95 |
| 2 | 2019 | 1 adult with kids | Canned milk | national | 1 | 2.00 |
| 3 | 2019 | 1 adult with kids | Cheese | national | 88 | 331.66 |
| 4 | 2019 | 1 adult with kids | Cheese | private | 42 | 92.96 |
| ... | ... | ... | ... | ... | ... | ... |
| 306 | 2022 | Single male | Ice cream/milk/sherbets | private | 4 | 8.65 |
| 307 | 2022 | Single male | Milk by-products | private | 5 | 6.15 |
| 308 | 2022 | Single male | Peanut butter/jelly/jams | private | 2 | 3.99 |
| 309 | 2022 | Single male | Yogurt | national | 11 | 15.24 |
| 310 | 2022 | Single male | Yogurt | private | 8 | 4.73 |
311 rows × 6 columns
dairy_df.groupby(['household_type','commodity']).agg(avg_engagement=('engagement','mean')).reset_index()
| household_type | commodity | avg_engagement | |
|---|---|---|---|
| 0 | 1 adult with kids | Butter | 3.750000 |
| 1 | 1 adult with kids | Canned milk | 1.000000 |
| 2 | 1 adult with kids | Cheese | 63.875000 |
| 3 | 1 adult with kids | Eggs | 17.000000 |
| 4 | 1 adult with kids | Fluid milk products | 42.375000 |
| 5 | 1 adult with kids | Ice cream/milk/sherbets | 10.375000 |
| 6 | 1 adult with kids | Milk by-products | 11.125000 |
| 7 | 1 adult with kids | Peanut butter/jelly/jams | 6.125000 |
| 8 | 1 adult with kids | Yogurt | 24.250000 |
| 9 | 2 adults with kids | Butter | 5.375000 |
| 10 | 2 adults with kids | Canned milk | 2.000000 |
| 11 | 2 adults with kids | Cheese | 75.625000 |
| 12 | 2 adults with kids | Eggs | 17.857143 |
| 13 | 2 adults with kids | Fluid milk products | 51.250000 |
| 14 | 2 adults with kids | Ice cream/milk/sherbets | 14.375000 |
| 15 | 2 adults with kids | Milk by-products | 15.500000 |
| 16 | 2 adults with kids | Peanut butter/jelly/jams | 9.285714 |
| 17 | 2 adults with kids | Yogurt | 28.125000 |
| 18 | 2 adults with no kids | Butter | 4.875000 |
| 19 | 2 adults with no kids | Canned milk | 1.250000 |
| 20 | 2 adults with no kids | Cheese | 53.500000 |
| 21 | 2 adults with no kids | Eggs | 11.000000 |
| 22 | 2 adults with no kids | Fluid milk products | 40.750000 |
| 23 | 2 adults with no kids | Ice cream/milk/sherbets | 8.875000 |
| 24 | 2 adults with no kids | Milk by-products | 10.000000 |
| 25 | 2 adults with no kids | Peanut butter/jelly/jams | 7.000000 |
| 26 | 2 adults with no kids | Yogurt | 24.125000 |
| 27 | Single female | Butter | 2.750000 |
| 28 | Single female | Canned milk | 1.000000 |
| 29 | Single female | Cheese | 24.750000 |
| 30 | Single female | Eggs | 5.500000 |
| 31 | Single female | Fluid milk products | 18.333333 |
| 32 | Single female | Ice cream/milk/sherbets | 4.875000 |
| 33 | Single female | Milk by-products | 5.000000 |
| 34 | Single female | Peanut butter/jelly/jams | 2.666667 |
| 35 | Single female | Yogurt | 8.750000 |
| 36 | Single male | Butter | 4.000000 |
| 37 | Single male | Canned milk | 1.000000 |
| 38 | Single male | Cheese | 40.000000 |
| 39 | Single male | Eggs | 10.428571 |
| 40 | Single male | Fluid milk products | 24.125000 |
| 41 | Single male | Ice cream/milk/sherbets | 7.375000 |
| 42 | Single male | Milk by-products | 8.142857 |
| 43 | Single male | Peanut butter/jelly/jams | 4.142857 |
| 44 | Single male | Yogurt | 15.625000 |
data_dairy = []
for d in dairy_df.commodity.unique():
tmp = dairy_df[dairy_df.commodity==d].groupby(['household_type','commodity']) \
.agg(avg_engagement=('engagement','mean')).reset_index()
tmp['avg_engagement'] = tmp['avg_engagement'].round(0)
data_dairy.append(go.Bar(x=tmp.household_type, y=tmp.avg_engagement, name=d, text=tmp.avg_engagement))
fig = go.Figure(
data=data_dairy,
layout = go.Layout(
title ='Dairy Products and Eggs Engagement Frequency by Household Type',
yaxis=dict(
title='Average Engagement'
)
)
)
fig.show()
Insight:
Recommendation:
dairy_df2 = dairy_df.groupby(['commodity','brand']).agg(total_rev=('revenue',sum)) \
.sort_values(['commodity','total_rev'], ascending=False).reset_index()
dairy_df2
| commodity | brand | total_rev | |
|---|---|---|---|
| 0 | Yogurt | national | 877.61 |
| 1 | Yogurt | private | 184.30 |
| 2 | Peanut butter/jelly/jams | national | 307.50 |
| 3 | Peanut butter/jelly/jams | private | 276.85 |
| 4 | Milk by-products | private | 464.18 |
| 5 | Milk by-products | national | 176.75 |
| 6 | Ice cream/milk/sherbets | national | 668.64 |
| 7 | Ice cream/milk/sherbets | private | 405.31 |
| 8 | Fluid milk products | private | 3100.48 |
| 9 | Fluid milk products | national | 247.48 |
| 10 | Eggs | private | 512.47 |
| 11 | Eggs | national | 52.67 |
| 12 | Cheese | national | 4049.76 |
| 13 | Cheese | private | 2319.23 |
| 14 | Canned milk | national | 24.27 |
| 15 | Canned milk | private | 8.14 |
| 16 | Butter | private | 304.74 |
| 17 | Butter | national | 153.76 |
dairy_df2.pivot(index='commodity', columns='brand', values='total_rev') \
.plot(kind='bar', stacked=True, title='Dairy Commodities Revenue by Brands', figsize=(12,6))
plt.xlabel(None)
plt.ylabel('Revenue')
plt.draw()
plt.savefig('images\dairy_by_brand.png',dpi=100)
plt.show()
Insight:
Recommendation:
sf_df=df[df.household_type=='Single female'].copy()
sf_df.groupby('age_band').agg(count=('customer_id',pd.Series.nunique)).reset_index()
| age_band | count | |
|---|---|---|
| 0 | 19-24 | 221 |
| 1 | 25-34 | 54 |
| 2 | 35-44 | 2 |
| 3 | 45-54 | 1 |
| 4 | 55-64 | 1 |
tmp = sf_df.groupby('age_band').agg(count=('customer_id',pd.Series.nunique)).reset_index()
fig = go.Figure(
data=[go.Pie(labels=tmp['age_band'], values=tmp['count'])]
)
fig.show()
sf_df.groupby('department').agg(engagement=('department','count')).reset_index()
| department | engagement | |
|---|---|---|
| 0 | Cosmetics | 44 |
| 1 | Deli | 303 |
| 2 | Floral | 28 |
| 3 | Grocery | 2268 |
| 4 | Meat | 994 |
| 5 | Nutrition | 217 |
| 6 | Pastry | 402 |
| 7 | Pharmaceutical | 1045 |
| 8 | Produce | 1347 |
| 9 | Salad Bar | 36 |
| 10 | Seafood | 200 |
tmp = sf_df.groupby('department').agg(engagement=('department','count')).reset_index()
plt.figure(figsize=(10, 5))
plt.title('Department Engagement for Single Female')
sns.set_style("whitegrid")
ax = sns.barplot(x='department', y='engagement', data=tmp, palette='Set2')
plt.xticks(rotation=45)
plt.savefig('images\single_female_dept.png')
plt.show()
dept = ['Grocery','Produce']
#top 5 grocery items and produce of single female
tmp = pd.concat([sf_df[sf_df.department == d] \
.groupby(['household_type','department','commodity']) \
.agg(engagement=('commodity','count')) \
.sort_values(['engagement'], ascending=False) for d in dept]).reset_index()
sf_top5 = pd.concat(
[tmp[tmp.department == d] \
.sort_values('engagement', ascending=False) \
.head(5) for d in tmp.department.unique()]).reset_index(drop=True)
sf_top5
| household_type | department | commodity | engagement | |
|---|---|---|---|---|
| 0 | Single female | Grocery | Soft drinks | 144 |
| 1 | Single female | Grocery | Baked bread/buns/rolls | 134 |
| 2 | Single female | Grocery | Cheese | 121 |
| 3 | Single female | Grocery | Fluid milk products | 110 |
| 4 | Single female | Grocery | Frozen meat | 91 |
| 5 | Single female | Produce | Tropical fruit | 158 |
| 6 | Single female | Produce | Salad | 112 |
| 7 | Single female | Produce | Apples | 91 |
| 8 | Single female | Produce | Onions | 86 |
| 9 | Single female | Produce | Potatoes | 70 |
plt.figure(figsize=(10, 5))
plt.title('Department Commodity Engagement - Single Female')
sns.set_style("whitegrid")
ax = sns.barplot(x='department', y='engagement', hue='commodity', data=sf_top5, palette='Set3')
ax.set_xticklabels(ax.get_xticklabels(), ha='center')
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.savefig('images\single_female_dept_commodity.png')
plt.show()
sm_df=df[df.household_type=='Single male'].copy()
sm_df.groupby('age_band').agg(count=('customer_id',pd.Series.nunique)).reset_index()
| age_band | count | |
|---|---|---|
| 0 | 19-24 | 384 |
| 1 | 25-34 | 106 |
| 2 | 35-44 | 8 |
tmp = sm_df.groupby('age_band').agg(count=('customer_id',pd.Series.nunique)).reset_index()
fig = go.Figure(
data=[go.Pie(labels=tmp['age_band'], values=tmp['count'])]
)
fig.show()
sm_df.groupby('department').agg(engagement=('department','count')).reset_index()
| department | engagement | |
|---|---|---|
| 0 | Cosmetics | 85 |
| 1 | Deli | 579 |
| 2 | Floral | 49 |
| 3 | Grocery | 3928 |
| 4 | Meat | 1656 |
| 5 | Nutrition | 339 |
| 6 | Pastry | 699 |
| 7 | Pharmaceutical | 1805 |
| 8 | Produce | 2465 |
| 9 | Salad Bar | 60 |
| 10 | Seafood | 373 |
tmp = sm_df.groupby('department').agg(engagement=('department','count')).reset_index()
plt.figure(figsize=(10, 5))
plt.title('Department Engagement for Single Male')
sns.set_style("whitegrid")
ax = sns.barplot(x='department', y='engagement', data=tmp, palette='Set2')
plt.xticks(rotation=45)
plt.savefig('images\single_male_dept.png')
plt.show()
#top 5 grocery items and produce of single male
tmp = pd.concat([sm_df[sm_df.department == d] \
.groupby(['household_type','department','commodity']) \
.agg(engagement=('commodity','count')) \
.sort_values(['engagement'], ascending=False) for d in dept]).reset_index()
sm_top5 = pd.concat(
[tmp[tmp.department == d] \
.sort_values('engagement', ascending=False) \
.head(5) for d in tmp.department.unique()]).reset_index(drop=True)
sm_top5
| household_type | department | commodity | engagement | |
|---|---|---|---|---|
| 0 | Single male | Grocery | Soft drinks | 255 |
| 1 | Single male | Grocery | Cheese | 205 |
| 2 | Single male | Grocery | Baked bread/buns/rolls | 194 |
| 3 | Single male | Grocery | Fluid milk products | 193 |
| 4 | Single male | Grocery | Frozen meat | 177 |
| 5 | Single male | Produce | Tropical fruit | 306 |
| 6 | Single male | Produce | Salad | 184 |
| 7 | Single male | Produce | Vegetables - all others | 177 |
| 8 | Single male | Produce | Onions | 162 |
| 9 | Single male | Produce | Potatoes | 155 |
plt.figure(figsize=(10, 5))
plt.title('Department Commodity Engagement - Single Male')
sns.set_style("whitegrid")
ax = sns.barplot(x='department', y='engagement', hue='commodity', data=sm_top5, palette='Set3')
ax.set_xticklabels(ax.get_xticklabels(), ha='center')
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.savefig('images\single_male_dept_commodity.png')
plt.show()